blast <- read.csv("cannsyn_blast.csv")
blast[1:3,]
##       qseqid qlen     sseqid     slen qstart qend   sstart     send evalue
## 1 LY658671.1 1639 AH3Ma.chr7 66405468      4 1639 13455428 52948405      0
## 2 LY658671.1 1639 AH3Ma.chr7 66405468      4 1639 13367548 53036285      0
## 3 LY658671.1 1639 AH3Ma.chr7 66405468      4 1639 13306820 53097013      0
##   bitscore score length pident nident mismatch positive gapopen gaps  ppos
## 1     3016  1633   1636 99.939   1635        1     1635       0    0 99.94
## 2     3011  1630   1636 99.878   1634        2     1634       0    0 99.88
## 3     3011  1630   1636 99.878   1634        2     1634       0    0 99.88
##   sstrand
## 1   minus
## 2   minus
## 3   minus
table(blast$qseqid)
## 
## AB212829.1 AB212830.1 AB292682.1 AB292683.1 AB292684.1 LY658671.1 
##        792        792        283        785        753        792
cbdas <- blast[blast$qseqid == "AB292682.1", ]
#cbdas <- cbdas[cbdas$pident >= 98, ]
cbdas$sseqid <- sub("\\.chr7", "", cbdas$sseqid)
table(cbdas$sseqid)
## 
##  AH3Ma  AH3Mb   BCMa   BCMb  BOAXa  BOAXb  COFBa  COFBb  COSVa  COSVb  DPFBa 
##      6      5      5      5      4      2      2      5      5      4      4 
##  DPFBb  DPSVa  DPSVb  EH23a  EH23b  FCS1a  FCS1b GERv1a GERv1b   GRMa   GRMb 
##      2      4      4      3      4      2      4      5      4      4      5 
##  H3S1a  H3S1b  H3S7a  H3S7b KCDv1a KCDv1b  KOMPa  KOMPb  MBFBa  MBFBb  MBSVa 
##      4      6      4      2      5      5      5      5      2      2      2 
##  MBSVb MM3v1a MM3v1b  NLv1a  NLv1b   OFBa   OFBb  PPFBa  PPFBb   S8Ha   S8Hb 
##      4      1      1      2      2      6      2      2      2      4      3 
##  SAN2a  SAN2b  SDFBa  SDFBb  SHH5a  SHH5b  SKFBa  SKFBb SN1v3a SN1v3b  SODLa 
##      4      4      3      2      4      3      6      2      4      4      5 
##  SODLb   STHa   STHb SVA12a SVA12b  SVA6a  SVA6b  SZFBa  SZFBb  TKFBa  TKFBb 
##      2      1      3      4      4      4      4      6      2      6      2 
##  TWFBa  TWFBb  TWSVa  TWSVb   UFBa   UFBb  WCFBa  WCFBb   WHWa   WHWb  YMv2a 
##      3      2      3      4      5      2      2      2      6      6      5 
##  YMv2b 
##      5
tmp <- read.csv("cbdas_ncbi.csv")
#tmp <- tmp[tmp$pident >= 98, ]
tmp[1:3, ]
##       qseqid qlen sseqid     slen qstart qend   sstart     send evalue bitscore
## 1 AB292682.1 1635 Abacus 63009714      1 1635 52216283 52217917      0     3020
## 2 AB292682.1 1635 Abacus 63009714      1 1635 51977965 51979602      0     2141
## 3 AB292682.1 1635 Abacus 63009714      1 1635 52071820 52073457      0     2130
##   score length  pident nident mismatch positive gapopen gaps   ppos sstrand
## 1  1635   1635 100.000   1635        0     1635       0    0 100.00    plus
## 2  1159   1638  90.293   1479      156     1479       2    3  90.29    plus
## 3  1153   1638  90.171   1477      158     1477       2    3  90.17    plus
#table(tmp$sseqid)
#tmp[tmp$sseqid == "Cannbio-2", ]
#tmp[tmp$sseqid == "jl", ]

cbdas <- rbind(cbdas, tmp)
cbdas <- cbdas[cbdas$pident >= 98, ]
table(cbdas$sseqid)
## 
##    Abacus      BCMa      BCMb     BOAXa     BOAXb     CBDRx     COSVb     DPSVb 
##         1         1         1         1         1         1         1         1 
##     EH23b    Finola    GERv1a    GERv1b      GRMa      GRMb JL_Mother    KCDv1a 
##         1         1         1         1         1         1         1         1 
##    KCDv1b     KOMPa     KOMPb     MBSVb    MM3v1a    MM3v1b      S8Ha     SHH5a 
##         1         1         1         1         1         1         1         1 
##      STHa    SVA12a    SVA12b     SVA6a     SVA6b     TWSVb     YMv2a     YMv2b 
##         1         1         1         1         1         1         1         1

BUSCO input

busc <- read.csv("BUSCOchrom7.csv.gz")
busc[1:3, ]
##                           GRMb   MM3v1b    SKFBa     OFBa    SZFBa    AH3Ma
## BUSCO_ID=3166at71240  65286253 55579498 66713137 66317410 67061999 66051080
## BUSCO_ID=71319at71240 65023896 55317742 66443801 66039010 66799898 65772685
## BUSCO_ID=91349at71240 64908056 55208151 66346418 65939846 66684053 65673517
##                          H3S1b    TKFBa   GERv1a    NLv1a   KCDv1a YMv2b
## BUSCO_ID=3166at71240  65770433 67488932 71114214 70547443 68758573    NA
## BUSCO_ID=71319at71240 65490615 67210527 70812690 70285090 68465576    NA
## BUSCO_ID=91349at71240 65375514 67111361 70705281 70169250 68367101    NA
##                          DPFBa    SVA6b    SVA6a   KCDv1b    TWSVa    TWFBa
## BUSCO_ID=3166at71240  68355999 57025633 58432902 65888651 68210096 67991257
## BUSCO_ID=71319at71240 68088363 57289169 58696438 65598806 67914651 67695812
## BUSCO_ID=91349at71240 67980498 57392508 58799777 65500331 67799094 67580260
##                          H3S7a    FCS1b    COFBb    SDFBa    SODLb    SAN2a
## BUSCO_ID=3166at71240  66742958 67168769 67300029 67276384 70372662 66435220
## BUSCO_ID=71319at71240 66463142 66895217 67034789 67014022 70097785 66161668
## BUSCO_ID=91349at71240 66348043 66780115 66908047 66898181 69979250 66046566
##                          DPSVa YMv2a    SAN2b   MM3v1a    COSVa   SVA12a
## BUSCO_ID=3166at71240  68413070    NA 66271111 62100268 68014952 59529159
## BUSCO_ID=71319at71240 68145440    NA 65985206 61810612 67749716 59792695
## BUSCO_ID=91349at71240 68037572    NA 65870104 61681487 67622979 59896034
##                          SZFBb    SODLa    AH3Mb     WHWb     WHWa    EH23a
## BUSCO_ID=3166at71240  65451624 66312621 66119675 65986693 65652068 65404425
## BUSCO_ID=71319at71240 65172417 66017291 65841280 65723052 65377397 65125238
## BUSCO_ID=91349at71240 65040388 65926868 65742112 65604807 65269533 64993212
##                          FCS1a     STHb    PPFBa     STHa     S8Hb    COFBa
## BUSCO_ID=3166at71240  66154507 65152589 63952183 66153069 64094844 63557806
## BUSCO_ID=71319at71240 65886892 64864912 63662262 65865293 63815658 63295236
## BUSCO_ID=91349at71240 65779027 64769916 63563096 65736164 63683635 63194828
##                          SKFBb    KOMPb    NLv1b    SHH5b    PPFBb     UFBb
## BUSCO_ID=3166at71240  65578794 65476390 66486958 64526525 63914573 64982578
## BUSCO_ID=71319at71240 65299605 65178042 66224605 64247339 63635381 64720010
## BUSCO_ID=91349at71240 65167576 65076688 66108765 64113332 63503352 64619600
##                          WCFBb    DPFBb     OFBb     BCMa    H3S7b GERv1b
## BUSCO_ID=3166at71240  64346173 64872297 64884477 65092030 64090130     NA
## BUSCO_ID=71319at71240 64083603 64609728 64621907 64785483 63794690     NA
## BUSCO_ID=91349at71240 63983194 64509319 64521499 64669126 63679140     NA
##                          SDFBb    MBFBb    TWFBb    TKFBb     S8Ha    BOAXb
## BUSCO_ID=3166at71240  65193212 64544135 64941741 64958896 60849792 64705418
## BUSCO_ID=71319at71240 64914023 64281566 64662552 64679707 60587432 64416321
## BUSCO_ID=91349at71240 64781995 64181156 64530519 64547675 60471592 64290042
##                          KOMPa    H3S1a     UFBa    SHH5a     BCMb    MBFBa
## BUSCO_ID=3166at71240  59321660 64881477 64662190 63812134 63953562 64585476
## BUSCO_ID=71319at71240 64836183 64603086 64391554 63549769 63674897 64307082
## BUSCO_ID=91349at71240 64727827 64503922 64287856 63433924 63555982 64207915
##                          BOAXa    WCFBa    MBSVa     GRMa    MBSVb    EH23b
## BUSCO_ID=3166at71240  63842027 65540064 66565083 61742407 63031272 62296366
## BUSCO_ID=71319at71240 63558199 65261666 66286689 61480050 63294809 62559906
## BUSCO_ID=91349at71240 63452011 65162501 66187525 61364210 63398147 62663245
##                         SVA12b    DPSVb    TWSVb    COSVb CBDRX.CBDRX.chr7
## BUSCO_ID=3166at71240  61351590 63314921 61879113 63107633            96875
## BUSCO_ID=71319at71240 61615126 63578455 62142649 63371167           359821
## BUSCO_ID=91349at71240 61718465 63681794 62245985 63474504           463043
##                       CBDRx.NC_044378.1 ABAC.ABAC.chr7 Abacus.CM046076.1
## BUSCO_ID=3166at71240              96895         124294            124314
## BUSCO_ID=71319at71240            359829         395893            395901
## BUSCO_ID=91349at71240            463090         502517            502564
##                       FIN.FIN.chr7 Finola.CM011610.1 PK.PK.chr7
## BUSCO_ID=3166at71240        178504            178520         NA
## BUSCO_ID=71319at71240       482401            487657         NA
## BUSCO_ID=91349at71240           NA                NA         NA
##                       Purple_Kush.CM010797.2 Cannbio.2.CM028020.1
## BUSCO_ID=3166at71240                      NA                   NA
## BUSCO_ID=71319at71240                     NA               373985
## BUSCO_ID=91349at71240                     NA                   NA
##                       jl_Kyirong.CM022973.1
## BUSCO_ID=3166at71240                     NA
## BUSCO_ID=71319at71240                    NA
## BUSCO_ID=91349at71240                    NA

omit NCBI

# busc <- busc[, grep("CBDRx.NC_044378.1|Abacus.CM046076.1|Finola.CM011610.1|Purple_Kush.CM010797.2", colnames(busc), invert = TRUE)]
# colnames(busc)[ colnames(busc) == "CBDRX.CBDRX.chr7"] <- "CBDRx"
# colnames(busc)[ colnames(busc) == "ABAC.ABAC.chr7"] <- "Abacus"
# colnames(busc)[ colnames(busc) == "FIN.FIN.chr7"] <- "Finola"
# colnames(busc)[ colnames(busc) == "PK.PK.chr7"] <- "Purple Kush"
# colnames(busc)[ colnames(busc) == "Cannbio.2.CM028020.1"] <- "Cannbio-2"
# colnames(busc)[ colnames(busc) == "jl_Kyirong.CM022973.1"] <- "jl_Kyirong"

Omit publics

#busc <- busc[, grep("CBDRx.NC_044378.1|Abacus.CM046076.1|Finola.CM011610.1|Purple_Kush.CM010797.2", colnames(busc), invert = TRUE)]
busc <- busc[, grep("CBDRX.CBDRX.chr7|ABAC.ABAC.chr7|FIN.FIN.chr7|PK.PK.chr7", colnames(busc), invert = TRUE)]

colnames(busc)[ colnames(busc) == "CBDRx.NC_044378.1"] <- "CBDRx"
colnames(busc)[ colnames(busc) == "Abacus.CM046076.1"] <- "Abacus"
colnames(busc)[ colnames(busc) == "Finola.CM011610.1"] <- "Finola"
colnames(busc)[ colnames(busc) == "Purple_Kush.CM010797.2"] <- "Purple Kush"
colnames(busc)[ colnames(busc) == "Cannbio.2.CM028020.1"] <- "Cannbio-2"
colnames(busc)[ colnames(busc) == "jl_Kyirong.CM022973.1"] <- "jl_Kyirong"
busc[1:3, 1:8]
##                           GRMb   MM3v1b    SKFBa     OFBa    SZFBa    AH3Ma
## BUSCO_ID=3166at71240  65286253 55579498 66713137 66317410 67061999 66051080
## BUSCO_ID=71319at71240 65023896 55317742 66443801 66039010 66799898 65772685
## BUSCO_ID=91349at71240 64908056 55208151 66346418 65939846 66684053 65673517
##                          H3S1b    TKFBa
## BUSCO_ID=3166at71240  65770433 67488932
## BUSCO_ID=71319at71240 65490615 67210527
## BUSCO_ID=91349at71240 65375514 67111361
busc[1:3, (ncol(busc)-4):ncol(busc)]
##                       Abacus Finola Purple Kush Cannbio-2 jl_Kyirong
## BUSCO_ID=3166at71240  124314 178520          NA        NA         NA
## BUSCO_ID=71319at71240 395901 487657          NA    373985         NA
## BUSCO_ID=91349at71240 502564     NA          NA        NA         NA
library(BUSCOplot)
library(ggplot2)
class(unlist(busc["BUSCO_ID=100415at71240", ]))
## [1] "integer"
#busc <- busc[ , 60:ncol(busc)]
colnames(busc)
##  [1] "GRMb"        "MM3v1b"      "SKFBa"       "OFBa"        "SZFBa"      
##  [6] "AH3Ma"       "H3S1b"       "TKFBa"       "GERv1a"      "NLv1a"      
## [11] "KCDv1a"      "YMv2b"       "DPFBa"       "SVA6b"       "SVA6a"      
## [16] "KCDv1b"      "TWSVa"       "TWFBa"       "H3S7a"       "FCS1b"      
## [21] "COFBb"       "SDFBa"       "SODLb"       "SAN2a"       "DPSVa"      
## [26] "YMv2a"       "SAN2b"       "MM3v1a"      "COSVa"       "SVA12a"     
## [31] "SZFBb"       "SODLa"       "AH3Mb"       "WHWb"        "WHWa"       
## [36] "EH23a"       "FCS1a"       "STHb"        "PPFBa"       "STHa"       
## [41] "S8Hb"        "COFBa"       "SKFBb"       "KOMPb"       "NLv1b"      
## [46] "SHH5b"       "PPFBb"       "UFBb"        "WCFBb"       "DPFBb"      
## [51] "OFBb"        "BCMa"        "H3S7b"       "GERv1b"      "SDFBb"      
## [56] "MBFBb"       "TWFBb"       "TKFBb"       "S8Ha"        "BOAXb"      
## [61] "KOMPa"       "H3S1a"       "UFBa"        "SHH5a"       "BCMb"       
## [66] "MBFBa"       "BOAXa"       "WCFBa"       "MBSVa"       "GRMa"       
## [71] "MBSVb"       "EH23b"       "SVA12b"      "DPSVb"       "TWSVb"      
## [76] "COSVb"       "CBDRx"       "Abacus"      "Finola"      "Purple Kush"
## [81] "Cannbio-2"   "jl_Kyirong"
# LETTERS[1:10] %in% LETTERS[3:4]
#colnames(busc)[ !colnames(busc) %in% cbdas$sseqid ]
busc <- busc[, colnames(busc) %in% cbdas$sseqid]
#colnames(busc)
#names(busc) %in% cbdas$sseqid
cbdas <- cbdas[grep("JL_Mother", cbdas$sseqid, invert = TRUE), ]
myx <- factor(cbdas$sseqid, levels = colnames(busc))

p <- gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2)
## Warning in gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2):
## Check for BUSCO table disabled, the user is responsible for results!
p

p <- p + annotate( geom = "line", x = as.numeric(myx), y = cbdas$sstart, linewidth = 1.2, color = "#1E90FF")
p <- p + annotate( geom = "point", x = myx, y = cbdas$sstart, shape = 24, bg = "#1E90FF", size = 4)
p

# ggsave(filename = "ggbusco_lineplot_chrom7_public.png",
#        device = "png", width = 6.5, height = 4.5, units = "in", dpi = 300)
busc <- busc[ , sort.int(colnames(busc) ,index.return = TRUE)$ix]

busc <- cbind(busc[ , grep("CBDRx|Abacus|Finola", colnames(busc), invert = TRUE)],  busc[ , c("CBDRx", "Abacus", "Finola")])


myx <- factor(cbdas$sseqid, levels = colnames(busc))

p <- gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2)
## Warning in gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2):
## Check for BUSCO table disabled, the user is responsible for results!
p

p <- p + annotate( geom = "line", x = as.numeric(myx), y = cbdas$sstart, linewidth = 1.2, color = "#1E90FF")
p <- p + annotate( geom = "point", x = myx, y = cbdas$sstart, shape = 24, bg = "#1E90FF", size = 4)
p